#' ---
#' title: "Replication for When Information Is Not Enough for Strategic Voting"
#' subtitle: "Observational Data Analysis"
#' author: "Lukas F. Stoetzer, Benjamin Schlegel, Patrick Kraft"
#' date: "September 2023"
#' ---

# needed libraries
library(tidyverse)
library(lme4)
library(texreg)
library(kableExtra)
library(margins)
library(haven)


# Data Preparation ==================

# load data
df_bes = read_csv("00_bes_data.csv") %>% 
  filter(year == 2015)

df_wave15 = haven::read_sav("00_bes_data2.sav") %>% 
  select(noDependentsInHousehold, workingStatus, id, econPersonalRetroW2, 
         infoSourceInternetW6, infoSourceTVW6, infoSourcePaperW6,
         infoSourceRadioW6, infoSourcePeopleW6, income = profile_gross_personal) %>% mutate(
    noDependentsInHousehold = ifelse(noDependentsInHousehold==2, NA,
                                     noDependentsInHousehold),
    unemployment = recode(as.numeric(workingStatus), "4"=1, .default = 0, .missing = NA_real_),
    econPersonalBetter = (as.numeric(econPersonalRetroW2) - 1) / 4,
    econPersonalBetter = ifelse(econPersonalBetter > 1, NA, econPersonalBetter),
    veryLowIncome = ifelse(income > 15, NA, income %in% 1:2)
  )

df_bes = df_bes %>% left_join(df_wave15, by = "id")

df_bes$media_exposure = df_bes %>% select(starts_with("infoSource")) %>% mutate(
  across(everything(), function(x){ifelse(x==9999, NA, x)}),
  across(everything(), function(x){ifelse(x < 3, 0, ifelse(x==3, 0.5, 1))})
) %>% apply(1, sum, na.rm = TRUE) >= 1


# recode main variables: strategic, information, cogntive resources and Incentive
df_bes$strategic = as.numeric(df_bes$vote_code_party_feelpre_actual_8 == 2)
df_bes$information = as.numeric(df_bes$media_exposure)
df_bes$information_alternative = round(df_bes$polknow.above.median)
df_bes$cognitive_load = as.numeric(!df_bes$noDependentsInHousehold)
df_bes$cognitive_load_alternative = 1 - df_bes$educl3plus
df_bes$incentive = ifelse(df_bes$tau_party_feelpre_actual_8_cat > 5, 1, 0)

# replace don't know with NA in partyIDstreng
df_bes$partyIDstrength = ifelse(df_bes$partyIDstrength == "don't know", NA, df_bes$partyIDstrength)

# select needed variables and remove cases with incomplete data 
df_bes = df_bes %>% 
  dplyr::select(incentive, cognitive_load, information, information_alternative, strategic, 
                 age, female, partyIDstrength, cognitive_load_alternative, refno) %>% 
  na.omit

# Model Estimation =====

# multilevel model with an interaction of the three concepts and age, gender, 
# party strength as control variables and district as level 2
model = lmer(strategic ~ information * cognitive_load * incentive +
               age + female + partyIDstrength + (1 | refno), data = df_bes)

## robustness check 1 (extraneous Cognitive Resources + political knowledge)
model_robust  <-  df_bes %>%
  mutate(information = information_alternative) %>%
  lmer(strategic ~ information * cognitive_load * incentive + 
               age + female + partyIDstrength + (1 | refno), data = .)

## robustness check 2 (intrinsic Cognitive Resources + media exposure)
model_robust2 <- df_bes %>%
  mutate(cognitive_load = cognitive_load_alternative) %>%
 lmer(strategic ~ information * cognitive_load * incentive + 
                      age + female + partyIDstrength + (1 | refno), data = .)


## robustness check 3 (intrinsic Cognitive Resources + political knowledge)
model_robust3 <- df_bes %>%
  mutate(cognitive_load = cognitive_load_alternative,
         information = information_alternative) %>%
  lmer(strategic ~ information * cognitive_load * incentive + 
         age + female + partyIDstrength + (1 | refno), data = .)


# Bootstrap shares =====================

# create data frame with all combinations of the three concepts and control variables set to mean/mode
newdf = data.frame(expand.grid(incentive = 0:1, 
                               information = 0:1, 
                               cognitive_load = 0:1, 
                               age = mean(df_bes$age, na.rm = TRUE),
                               partyIDstrength = "fairly strong", # mode
                               female = 0,   # mode
                               refno = 245)) # mode

# bootstrap predicted probabilities with the created data frame
predFun <- function(fit) {
  predict(fit, newdf)
}


# Bootstrap for all Models
df_plot <- list(
  m1 = bootMer(model, nsim=500, FUN=predFun),
  m2 = bootMer(model_robust, nsim=500, FUN=predFun),
  m3 = bootMer(model_robust2, nsim=500, FUN=predFun),
  m4 = bootMer(model_robust3, nsim=500, FUN=predFun)
) 

# Create Data Frame
df_plot2 <- df_plot %>% 
  map(~ t(apply(.x$t, 2, quantile, c(0.025,0.05,0.50,0.95,0.975)))) %>% 
  map_dfr(as_tibble, .id = "Model") %>% 
  mutate(incentive = rep(newdf$incentive, 4),
         cognitive_load = rep(newdf$cognitive_load, 4),
         information = rep(newdf$information, 4)
  )  %>% mutate(
    information_lab = factor(ifelse(information==1, "High Information", "Low Information"),
                         levels = c("Low Information", "High Information")),
    incentive = factor(ifelse(incentive==0, "Low Incentives", "High Incentives"), 
                       levels = c("Low Incentives", "High Incentives")),
    cognitive_load_alb = ifelse(cognitive_load==1, "Low Cognitive Resources", "High Cognitive Resources"),
    condition = as.factor(paste0(as.character(information), " \n ", as.character(cognitive_load)))
  ) %>% 
  mutate(label = case_when(information == 0 & cognitive_load == 0 ~ "Control",
                           information == 1 & cognitive_load == 0 ~ "Inf.",
                           information == 0 & cognitive_load == 1 ~ "Cogn. Res.",
                           information == 1 & cognitive_load == 1 ~ "Inf. + Cogn. Res."), 
         label = factor(label, 
                        levels = rev(c("Inf. + Cogn. Res.", "Inf.", "Cogn. Res.","Control")),
                        labels = rev(c("Inf. + \n Cogn. Res.", "Inf.", "Cogn. Res.","Control")),
                        )) %>%
  mutate(  Model = recode_factor(Model,
                               m3 = "Model 1:\nMedia Exposure &\nEducation",
                               m1 = "Model 2:\nMedia Exposure &\nDependent in Household",
                               m4 = "Model 3:\nPolitical Knowledge &\nEducation",
                               m2 = "Model 4:\nPolitical Knowledge &\nDependent in Household")) 


# Plot
ggplot(df_plot2, aes(y = label, x = `50%`, 
                    xmin = `2.5%`, xmax = `97.5%`)) +
  facet_grid(Model~incentive) + coord_flip() +
  geom_col(position = "dodge") +
  geom_errorbarh(height = 0, position = position_dodge(width = .9)) +
  geom_point() +
  theme_minimal() + 
  labs(y = "", x = "Share Strategic Vote", fill = "Information") +
  scale_x_continuous(labels = scales::percent, breaks = c(0,.2,.4)) +
  theme(text = element_text(size = 16), 
        legend.position = "bottom") +
  scale_fill_brewer()


ggsave("fig_B2.pdf", width=12, height=12)



# Combine marginal effects in single plot ---------------------------------


# Function to obtain Marginal effects
calc_me <- function(model){
  
  # Define matrix with cases 
  d_test <- expand.grid("information"=0:1, 
                        "cognitive_load"=0:1, 
                        "incentive"=0:1)
  xnew <- model.matrix(~ -1 + information * cognitive_load * incentive, d_test)
  xnew <- xnew[,-which(colnames(xnew)=="incentive")]

  # Estimates and VarCov
  par <- fixef(model)[colnames(xnew)]
  V <- vcov(model)[colnames(xnew),colnames(xnew)]

  # N
  N <- nobs(model)
  
  # Calculate marginal effect and se
  df_est <- cbind(data.frame(
    "est"=c(xnew %*% par),
    "se" = sqrt(diag(xnew %*% V %*% t(xnew))),
    "incentive"=c(rep(0,4),rep(1,4))
  ), xnew) %>%
    mutate(
      "low" = est - se* qt(0.975,N),
      "hgh" = est + se* qt(0.975,N),
      "low2" = est - se* qt(0.95,N),
      "hgh2" = est + se* qt(0.95,N),
    )
  
  # return
  return(df_est)
  
}




# Calculate Marginal Effects
df_me <- list(
  m1 = calc_me(model),
  m2 = calc_me(model_robust),
  m3 = calc_me(model_robust2),
  m4 = calc_me(model_robust3)
) 

# Plot Results
df_plot2 <- df_me %>% 
  map_dfr(as_tibble, .id = "Model") %>% 
  mutate(incentive  = recode_factor(incentive, 
                                    `1` = "High Incentive",
                                    `0` = "Low Incentive")) %>% 
  mutate(label = case_when(information == 0 & cognitive_load == 0 ~ "Control",
                           information == 1 & cognitive_load == 0 ~ "Inf.",
                           information == 0 & cognitive_load == 1 ~ "Cogn. Res.",
                           information == 1 & cognitive_load == 1 ~ "Inf. + Cogn. Res."), 
         label = factor(label, 
                        levels = rev(c("Inf. + Cogn. Res.", "Inf.", "Cogn. Res.")),
                        labels = rev(c("Information &\nCognitive Resources", "Information", "Cognitive Resources"))),
         Model = recode_factor(Model,
                               m3 = "Model 1:\nMedia Exposure &\nEducation",
                               m1 = "Model 2:\nMedia Exposure &\nDependent in Household",
                               m4 = "Model 3:\nPolitical Knowledge &\nEducation",
                               m2 = "Model 4:\nPolitical Knowledge &\nDependent in Household")) %>%
  filter(label != "Control")


ggplot(df_plot2,aes(x = est, y = label,shape=Model)) +
  geom_point(position = position_dodge(width = .3)) +
  geom_errorbarh(aes(xmin = low, xmax = hgh), height = .1, position = position_dodge(width = .3)) +
  geom_errorbarh(aes(xmin = low2, xmax = hgh2), height = 0, position = position_dodge(width = .3)) +
  facet_grid( ~ incentive) + 
  labs(y = "", x = "Effect of High Levels of Information and Cognitive Ressources on Strategic Voting\n(vs. Low Levels in Control Condition)") +
  theme_minimal() +
  geom_vline(aes(xintercept=0), col="red",alpha=0.2) +
  theme(text = element_text(size = 16),legend.position = "bottom")
  
ggsave("fig_B1.pdf",width=12, height=6)



# Regression table including all 4 models ---------------------------------

# generate nice output table of the model
texreg(list(model_robust2, model, model_robust3, model_robust), 
       file="tab_B2.tex", 
       custom.coef.map = list(
         "information" = "Information", 
         "information_alternative" = "Information", 
         "cognitive_load" = "Cognitive Resources", 
         "cognitive_load_alternative" = "Cognitive Resources", 
         "incentive" = "Incentive",
         "information:incentive" = "Information x Incentive",
         "information_alternative:incentive" = "Information x Incentive",
         "cognitive_load:incentive" = "Cognitive Resources x Incentive",
         "cognitive_load_alternative:incentive" = "Cognitive Resources x Incentive",
         "information:cognitive_load:incentive" = "Information x Cognitive Resources x Incentive",
         "information_alternative:cognitive_load:incentive" = "Information x Cognitive Resources x Incentive",
         "information:cognitive_load_alternative:incentive" = "Information x Cognitive Resources x Incentive",
         "information_alternative:cognitive_load_alternative:incentive" = "Information x Cognitive Resources x Incentive",
         "age" = "Age", 
         "female" = "Female", 
         "partyIDstrengthnot very strong" = "Party ID: not very strong",
         "partyIDstrengthvery strong" = "Party ID: very strong",
         "(Intercept)" = "Constant"),
       caption = "Regression Results Observational Study",
       label = "table:bes",
       float.pos = "ht")

